% matlab code for Pr(underestimation) vs #sample curve

numStates = 20;

P = [0.1 * ones(1, numStates-1), 1];

scale = 0.01; % due to 100:1 scaling in the test.lua
gamma = 0.9;

numSamples = 20:20:400;

belowReal = zeros(size(numSamples));
theoBelowReal = zeros(size(numSamples));

for j = 1 : size(numSamples, 2)

    Q = importdata("../data/170510_145110_"+numSamples(j)+".v.txt");
    Q = Q(Q>0) * scale;
    meanQ = mean(Q);
    muQ = log((meanQ^2)/sqrt(std(Q)^2+meanQ^2));

    N = numSamples(j) * ones(1, numStates);
    [meanQ, stdQ] = EstimateMeanStdQ(gamma, P, N);
    sigmaQ = sqrt(log(stdQ^2/(meanQ^2)+1));
        
    belowReal(j) = sum(Q <= mean(Q)) / size(Q, 1);
    theoBelowReal(j) = 0.5* (1  + erf(sigmaQ / sqrt(8)));

end

figure
hold on
plot(numSamples, belowReal, 'LineStyle', 'none', 'Marker', 'o')
plot(numSamples, theoBelowReal)
legend('empirical', 'theoretical')
ylim([0.5 0.8])
xlabel('#observations per state-action')
ylabel('probability of underestimation')
hold off

